9a0ab8336353d436ded86d6e19954efa1d43a223,experiment/src/main/java/zemberek/corpus/WebDocument.java,WebDocument,fromText,#String#List#,75
Before Change
String id = url.replaceAll("http://|https://", "");
String source = Regexps.firstMatch(sourcePattern, meta, 2);
String crawlDate = Regexps.firstMatch(crawlDatePattern, meta, 2);
String labels = Regexps.firstMatch(labelPattern, meta, 2).replace('\"', ' ').trim();
String category = Regexps.firstMatch(categoryPattern, meta, 2).replace('\"', ' ').trim();
String title = Regexps.firstMatch(titlePattern, meta, 2).replace('\"', ' ').trim();
After Change
String id = url.replaceAll("http://|https://", "");
String source = Regexps.firstMatch(sourcePattern, meta, 2);
String crawlDate = Regexps.firstMatch(crawlDatePattern, meta, 2);
String labels = getAttribute(Regexps.firstMatch(labelPattern, meta, 2));
String category = getAttribute(Regexps.firstMatch(categoryPattern, meta, 2));
String title = getAttribute(Regexps.firstMatch(titlePattern, meta, 2));